Results presented in “Soil biodiversity and chemical stressors: global review and roadmap for future research”.
Figures are created from the script “FIGURES.R”.
library(dplyr)
# load packages for maps
library(maptools)
library(ggmap)
library(rnaturalearth)
library(countrycode)
library(viridis)
library(ggplot2)
library(knitr)
library(patchwork)
library(reshape2)
`%notin%` = Negate(`%in%`)
# DATA--------
metadatreview <- read.csv("Data/References_pollution_review.csv")
nrow(metadatreview)
[1] 274
Are there geographical bias and what regions are poorly covered ?
Distribution of studies included by country
Countries most represented?
stud <- metadatreview %>%
group_by(Country) %>%
summarize(no.stu = n())
Factor `Country` contains implicit NA, consider using `forcats::fct_explicit_na`
multicountrystud <- stud[grepl(";|,", stud$Country),]
# Manually add multiple countries studies:
addingcountries <- data.frame(rbind(c("Brazil", 1), c("Portugal", 1),
c("Canada", 1), c("france", 1), c("The Netherlands", 1), c("Switzerland", 1),
c("Germany", 1), c("The Netherlands", 1), c("UK", 1), c("Portugal", 1),
c("Indonesia", 1),c("Malaysia", 1),
c("UK", 1),
c("The Netherlands", 1), c("Belgium", 1), c("france", 1), c("germany", 1)))
colnames(addingcountries) <- colnames(stud)
addingcountries$no.stu <- as.numeric(addingcountries$no.stu)
stud <- rbind(as.data.frame(stud), addingcountries)
# harmonize country names
stud <- stud %>%
mutate(iso = countrycode(stud$Country, origin = "country.name", destination = "iso3c")) %>%
dplyr::group_by(iso) %>%
summarize(no.stud = sum(no.stu, na.rm = TRUE))
Some values were not matched unambiguously: , Brazil;Portugal, Canada;France;Netherlands;Switzerland, England, Germany, The Netherlands, UK, Portugal, Indonesia, Malaysia, Scotland, UK, The Netherlands; Belgium; France; Germany
Some strings were matched more than once, and therefore set to <NA> in the result: Brazil;Portugal,BRA,PRT; Canada;France;Netherlands;Switzerland,CAN,FRA,NLD,CHE; Germany, The Netherlands, UK, Portugal,DEU,NLD,PRT; Indonesia, Malaysia,IDN,MYS; The Netherlands; Belgium; France; Germany,BEL,FRA,DEU,NLD
stud[stud$no.stud > 20,]
summary(metadatreview$Published.Year)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1991 2003 2009 2008 2014 2018
whichPollutionType <- c("Metals", "PAH", "Pesticides", "Plastics/Plasticizers","Pharmaceuticals","Radiation", "Nanoparticles", "Others")
iterations <- length(whichPollutionType)
variables <- 1
output <- matrix(ncol=variables, nrow=iterations)
for(i in 1:iterations){
output[i,] <- length(grep(whichPollutionType[i], ignore.case = TRUE, as.character(metadatreview$PollutantType)))
}
output <- data.frame(PollutionType = whichPollutionType,
no.studies = output)
kable(output)
| PollutionType | no.studies |
|---|---|
| Metals | 159 |
| PAH | 19 |
| Pesticides | 89 |
| Plastics/Plasticizers | 2 |
| Pharmaceuticals | 14 |
| Radiation | 3 |
| Nanoparticles | 2 |
| Others | 7 |
NA
Cumulative number of studies on each type of pollutant studied through time
whichPollutionSource <- c("Agricultural/livestock","Industrial","Mining/Smelting","Military/Wars", "Natural/geogenic","Waste/Sewage","Urban/Transport","Others")
iterations <- length(whichPollutionSource)
variables <- 1
output <- matrix(ncol=variables, nrow=iterations)
for(i in 1:iterations){
output[i,] <- length(grep(whichPollutionSource[i], ignore.case = TRUE, as.character(metadatreview$PollutionSource)))
}
output <- data.frame(output)
output$PollutionSource <- whichPollutionSource
kable(output)
| output | PollutionSource |
|---|---|
| 100 | Agricultural/livestock |
| 31 | Industrial |
| 73 | Mining/Smelting |
| 9 | Military/Wars |
| 3 | Natural/geogenic |
| 23 | Waste/Sewage |
| 18 | Urban/Transport |
| 37 | Others |
NA
# sum up industrial/mining/waste (overlaps)
sum(output$output[output$PollutionSource=="Industrial"],
output$output[output$PollutionSource=="Mining/Smelting"])
[1] 104
What are the “others” in Pollutant type category: - salts - explosives
metadatreview$PollutantName[metadatreview$PollutantType=="Others"]
[1] salt: chloride and sodium salt
[3] chemical warfare agents
195 Levels: ...
# create new pol type where = mixture when more than one category of pollutants are involved
metadatreview$PollutantType2 <- as.factor(ifelse(grepl(",", metadatreview$PollutantType), "Mixture", as.character(metadatreview$PollutantType)))
summary(metadatreview$PollutantType2)
Metals
143
Mixture
17
Nanoparticles
2
Others
3
PAH (Polycyclic aromatic hydrocarbons)
8
Pesticides
85
Pharmaceuticals
13
Radionuclides/Radiation
3
When accounting for mixture across AND within pollutant categories: most studies focus on multiple chemicals. Indeed, half of the studies are observational, and most contaminations and pollutions involve multiple chemicals in real-world scenarios.
# create new pol type where = mixture when more than one category of pollutant is involved (for plot purposes)
metadatreview$PollutantType3 <-as.factor(ifelse(
# mixtures across and within categoreis of stressors
grepl(",|;|and|several|many|multiple|nanomaterials|metals|cides|carbons|PAHs|agents|residues", metadatreview$PollutantName)|
grepl(",", metadatreview$PollutantType), "Mixtures",
as.character(metadatreview$PollutantType)))
summary(metadatreview$PollutantType3)
Metals
34
Mixtures
185
Nanoparticles
1
Others
1
PAH (Polycyclic aromatic hydrocarbons)
3
Pesticides
40
Pharmaceuticals
7
Radionuclides/Radiation
3
whichTaxa <- c("Protists","Tardigrades","Rotifers","Nematodes","Enchytraeids","Acari","Collembola","Protura","Diplura","Pseudoscorpions","Ants","Termites","Isopoda","Myriapoda","Earthworms","Coleoptera","Arachnida","Gastropoda","Larvae","Others")
iterations = length(whichTaxa)
variables = 1
output <- matrix(ncol=variables, nrow=iterations)
for(i in 1:iterations){
output[i,] <- length(grep(whichTaxa[i], ignore.case = TRUE, as.character(metadatreview$TaxaGroupAtlas)))
}
output <- data.frame(output)
output$Taxagroup <- whichTaxa
output$no.studies <- output$output
output$Taxagroup2 <- reorder(output$Taxagroup, output$output)
ggplot(output, aes(x = Taxagroup2, y = no.studies))+
geom_col()+
theme_bw() +
theme(axis.text.y=element_text(face = "bold", size = rel(2)),
axis.text.x=element_text(angle = 45, hjust = 1, size = rel(2)),
axis.title.y = element_text(size=18, face = "bold"),
#legend
legend.title = element_blank() ,
legend.text = element_text(size=16),
legend.position = "bottom",
#for the facets
strip.text.x = element_text(size=16, face = "bold"),
strip.background = element_rect(colour="black", fill="white"))
kable(output[,1:2])
| output | Taxagroup |
|---|---|
| 7 | Protists |
| 4 | Tardigrades |
| 2 | Rotifers |
| 119 | Nematodes |
| 36 | Enchytraeids |
| 95 | Acari |
| 87 | Collembola |
| 13 | Protura |
| 17 | Diplura |
| 12 | Pseudoscorpions |
| 26 | Ants |
| 5 | Termites |
| 32 | Isopoda |
| 46 | Myriapoda |
| 81 | Earthworms |
| 55 | Coleoptera |
| 47 | Arachnida |
| 16 | Gastropoda |
| 36 | Larvae |
| 38 | Others |
NA
metadatreview$Macrofaune <- factor(ifelse(grepl("Ants|Termites|Isopoda|Myriapoda|Earthworms|Coleoptera|Arachnida|Gastropoda",
ignore.case = TRUE, as.character(metadatreview$TaxaGroupAtlas)), "Macrofauna", "Not macrofauna"))
summary(metadatreview$Macrofaune)
Macrofauna Not macrofauna
106 168
Alluvial diagram showing that a few emblematic taxonomic groups and pollutant types dominate research
What is the scope of studies on chemical stressors impacts on natural soil fauna communities?
Studies addressing topics relevant to reach an ecosystem perspective
# count the no. of commas in the list
# subset dataframe keeping only paperID and drivers
df_taxo <- subset(metadatreview, select = c(X, TaxaGroupAtlas))
# split multiple taxa and pollutants into separate rows with strsplit
new_df_taxolist <- strsplit(as.character(df_taxo$TaxaGroupAtlas), ',')
new_df_combi_taxa <- data.frame(
TaxaGroupAtlas = unlist(new_df_taxolist),
PaperID = rep(df_taxo$X, sapply(new_df_taxolist, FUN = length)))
# no. of taxo group per paper
new_df_counts <- new_df_combi_taxa %>% group_by(PaperID) %>% summarise(no.taxa = n())
# descriptive stats
summary(new_df_counts)
PaperID no.taxa
Min. : 1.00 Min. : 1.000
1st Qu.: 69.25 1st Qu.: 1.000
Median :137.50 Median : 1.000
Mean :137.50 Mean : 2.836
3rd Qu.:205.75 3rd Qu.: 3.000
Max. :274.00 Max. :14.000
hist(new_df_counts$no.taxa)
table(new_df_counts$no.taxa)
1 2 3 4 5 6 7 8 9 10 11 12 13 14
161 32 16 9 8 10 5 9 4 9 4 3 1 3
# Calculate the percentage of each group
new_df_counts <- new_df_counts %>%
group_by(no.taxa) %>%
summarise(no.studies = n(), Percent = n()/nrow(.) * 100)
kable(new_df_counts)
| no.taxa | no.studies | Percent |
|---|---|---|
| 1 | 161 | 58.7591241 |
| 2 | 32 | 11.6788321 |
| 3 | 16 | 5.8394161 |
| 4 | 9 | 3.2846715 |
| 5 | 8 | 2.9197080 |
| 6 | 10 | 3.6496350 |
| 7 | 5 | 1.8248175 |
| 8 | 9 | 3.2846715 |
| 9 | 4 | 1.4598540 |
| 10 | 9 | 3.2846715 |
| 11 | 4 | 1.4598540 |
| 12 | 3 | 1.0948905 |
| 13 | 1 | 0.3649635 |
| 14 | 3 | 1.0948905 |
NA
NA
Search the abstracts of the studies for foodweb, food-web, trophic: 4 papers have foodweb in the abstract, only one paper has it in the title, 43 have trophic in the abstract (nematodes), 2 in the title.
# number of papers with those keywords in the abstract
whichKeywords <- c("foodweb|food-web")
iterations = length(whichKeywords)
variables = 1
output <- matrix(ncol=variables, nrow=iterations)
for(i in 1:iterations){
output[i,] <- length(grep(whichKeywords[i], ignore.case = TRUE, as.character(metadatreview$abstract)))
}
output <- data.frame(output)
output$Keyword <- whichKeywords
output
# number of papers with those keywords in the title
whichKeywords <- c("foodweb|food-web", "trophic")
iterations = length(whichKeywords)
variables = 1
output2 <- matrix(ncol=variables, nrow=iterations)
for(i in 1:iterations){
output2[i,] <- length(grep(whichKeywords[i], ignore.case = TRUE, as.character(metadatreview$title)))
}
output2 <- data.frame(output2)
output2$Keyword <- whichKeywords
output2
NA
whichDiv <- c("Abundance","Biomass","Richness","Shannon","Evenness", "Others")
iterations = length(whichDiv)
variables = 1
output <- matrix(ncol=variables, nrow=iterations)
for(i in 1:iterations){
output[i,] <- length(grep(whichDiv[i], ignore.case = TRUE, as.character(metadatreview$DiversityMetric)))
}
output <- data.frame(output)
output$Measurement <- whichDiv
output
metadatreview <- metadatreview %>%
mutate(OtherBiodiversity = factor(ifelse(OtherBiodiversity=="lichens", "plants", # lichens in the plant category for simplification
as.character(OtherBiodiversity))))
# Calculate the percentage of each group
metadatreviewsummary <- metadatreview %>%
group_by(OtherBiodiversity) %>%
summarise(no.studies = n(), Percent = n()/nrow(.) * 100)
kable(metadatreviewsummary)
| OtherBiodiversity | no.studies | Percent |
|---|---|---|
| aboveground invertebrates | 2 | 0.7299270 |
| microbes | 37 | 13.5036496 |
| no | 198 | 72.2627737 |
| plants | 20 | 7.2992701 |
| plants,aboveground invertebrates | 1 | 0.3649635 |
| plants,microbes | 16 | 5.8394161 |
# Calculate the percentage of studies that have no,, one or several functions (based on full text screening)
metadatreviewsummary2 <- metadatreview %>%
group_by(EcosystemFunction) %>%
summarise(n(), Percent = n()/nrow(.) * 100)
kable(metadatreviewsummary2)
| EcosystemFunction | n() | Percent |
|---|---|---|
| none | 231 | 84.306569 |
| one | 27 | 9.854015 |
| several | 16 | 5.839416 |
# how many papers have function in the abstract?
subsetEF <- metadatreview[grepl("function", metadatreview$abstract),]
nrow(subsetEF)
[1] 59
# removing "functional groups" (nematodes studies often reported this)
nrow(subsetEF[!grepl("functional", subsetEF$abstract),])
[1] 23
# create the multiple stressors var
metadatreview <- metadatreview %>%
mutate(Multistressors = factor(ifelse(grepl(",", PollutantType), "Multiple stressors",
"Single stressor")),
Multidrivers = factor(ifelse(LandUse=="TRUE"|Intensification=="TRUE"|Fragmentation.Loss=="TRUE"|Nutrient=="TRUE"|ClimateChange=="TRUE"|Invasives=="TRUE", "yes", "no")))
# Calculate the percentage of each group
metadatreviewsummary3 <- metadatreview %>%
group_by(Multidrivers) %>%
summarise(no.studies = n(), Percent = n()/nrow(.) * 100)
# create the list of vector each corresponding to a separate circle
multiGCDpapers <- data.frame(no.stu = t(
data.frame(
withLUI = nrow(metadatreview[metadatreview$Intensification == "TRUE", ]),
withLUC = nrow(metadatreview[metadatreview$LandUse == "TRUE", ]),
withInvas = nrow(metadatreview[metadatreview$Invasives == "TRUE", ]),
withClim = nrow(metadatreview[metadatreview$ClimateChange == "TRUE", ]),
withNut = nrow(metadatreview[metadatreview$Nutrient == "TRUE", ]),
withFragm = nrow(metadatreview[metadatreview$Fragmentation.Loss == "TRUE", ])
)
))
multiGCDpapers$OtherDrivers <- rownames(multiGCDpapers)
kable(multiGCDpapers)
| no.stu | OtherDrivers | |
|---|---|---|
| withLUI | 28 | withLUI |
| withLUC | 22 | withLUC |
| withInvas | 1 | withInvas |
| withClim | 5 | withClim |
| withNut | 37 | withNut |
| withFragm | 0 | withFragm |
NA
NA
summary(metadatreview$ExperimentObservation)
Experimental
1 133
Experimental,Observational Observational
1 139
whichSystem <- c("Grassland","Woodland","Cropland","Wetland","Artifical","Bare land","Shrubland","Others")
iterations = length(whichSystem)
variables = 1
output <- matrix(ncol=variables, nrow=iterations)
for(i in 1:iterations){
output[i,] <- length(grep(whichSystem[i], ignore.case = TRUE, as.character(metadatreview$System)))
}
output <- data.frame(output)
output$System <- whichSystem
output$no.studies <- output$output
output$System2 <- reorder(output$System, output$output)
ggplot(output, aes(x = System2, y = no.studies))+
geom_col()+
theme_bw() +
theme(axis.text.y=element_text(face = "bold", size = rel(2)),
axis.text.x=element_text(angle = 45, hjust = 1, size = rel(2)),
axis.title.y = element_text(size=18, face = "bold"),
#legend
legend.title = element_blank() ,
legend.text = element_text(size=16),
legend.position = "bottom",
#for the facets
strip.text.x = element_text(size=16, face = "bold"),
strip.background = element_rect(colour="black", fill="white"))
NA
NA
# # 22 studies have bioindicator in the abstract
whichKeywords <- c("bioindicator|bio-indicator")
iterations = length(whichKeywords)
variables = 1
output <- matrix(ncol=variables, nrow=iterations)
for(i in 1:iterations){
output[i,] <- length(grep(whichKeywords[i], ignore.case = TRUE, as.character(metadatreview$abstract)))
}
output <- data.frame(output)
output$Keyword <- whichKeywords
output
# list those papers
BioIndicStudiesAbstract <- metadatreview[grepl("bioindicator", metadatreview$abstract),]
BioIndicStudiesTitle <- metadatreview[grepl("bioindicator", metadatreview$title),]
BioIndicStudiesTitle
The list of papers focusing on plants, microbes and soil fauna was used to investigate whether the studies addressed indirect effects of chemical stressors, and if they considered foodweb or network approaches.
# create a table with all studies that have at least one EF
WhichVars <- c("authors", "Published.Year", "journal", "title", "DOI", "Country", "DiversityMetric", "PollutantName", "PollutantType", "PollutionSource", "TaxaGroupAtlas", "System", "OtherBiodiversity", "EcosystemFunction", "Multidrivers")
MBdata <- metadatreview[metadatreview$OtherBiodiversity != "no",WhichVars]
write.csv(MBdata, "Output/TablePapersWithMultiDiv.csv")
The list of papers having at least one function was used to investigate what type of functions and ecosystem services were addressed in those studies.
# create a table with all studies that have at least one EF
WhichVars <- c("authors", "Published.Year", "journal", "title", "DOI", "Country", "DiversityMetric", "PollutantName", "PollutantType", "PollutionSource", "TaxaGroupAtlas", "System", "OtherBiodiversity", "EcosystemFunction", "Multidrivers")
EFdata <- metadatreview[metadatreview$EcosystemFunction != "none",WhichVars]
write.csv(EFdata, "Output/TablePapersWithEF.csv")
The list of papers having multiple global change drivers was used to investigate how often studies had a full factorial design enabling to address interactive effects.
WhichVars <- c("authors", "Published.Year", "journal", "title", "DOI", "Country", "DiversityMetric", "PollutantName", "PollutantType", "PollutionSource", "TaxaGroupAtlas", "System", "OtherBiodiversity", "EcosystemFunction", "Multidrivers",
"ClimateChange", "Nutrient", "LandUse",
"Intensification", "Fragmentation.Loss", "Invasives")
MultiGCDdata <- metadatreview[metadatreview$Multidrivers != "no",WhichVars]
write.csv(MultiGCDdata, "Output/TablePapersWithMultiGCDs.csv")